##### restructuring SEP wave data for WTP analysis (robustness checks) ######
################################################################

rm(list = ls())

# Load Packages 
library(tidyverse)
library(haven)
library(labelled)
library(data.table)
library(stringr)
library(cregg)


# Load wave data
w8 <- read_dta("/Volumes/1709-UmweltPanel/data/w8/final_data/upanel_w8.dta") %>% 
  zap_formats() %>% 
  zap_label %>% 
  as.data.frame()

# Save conjoint data
df_all <- w8 %>% 
  # subset to conjoint participants (this also filters unit non-response)
  filter(w8_surveyarm == 1) %>% 
  # select conjoint variables
  select(PubId, contains("q32"), contains("conj")) %>% 
  # get rid of variables indicating attribute order of appearance
  select(-contains(c("r1attr", "r2attr", "r3attr", "r4attr"))) 



# data prep ---------------------------------------------------------------

# Drop observations with any missings
df <- df_all %>% 
  filter_all(all_vars(. >= 0))

# Examine missings
dropped <- anti_join(df_all, df)

# drop product randomisation from main df ---------------------------------
df_final <- df %>% 
  select(-w8_treat_conj_buy)

# Save coding scheme of attribute variables (one for each)
labels <- df %>% 
  select(contains("w8_conjoint_buy_r"))

conj_attr1 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1price)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1price)))

conj_attr2 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1life)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1life)))

conj_attr3 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1env)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1env)))

conj_attr4 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1eff)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1eff)))

conj_attr5 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1recy)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1recy)))

conj_attr6 <- data.frame(
  code = as.numeric(val_labels(labels$w8_conjoint_buy_r1prod1rep)),
  attr = names(val_labels(labels$w8_conjoint_buy_r1prod1rep)))


# data wrangling ----------------------------------------------------------

dfconj <- df_final


### Stretch dataframe
dfconj_long <- dfconj %>% 
  pivot_longer(-PubId, names_to = "variable", values_to = "value")

dfconj_long <- dfconj_long %>% 
  mutate(value = as.numeric(value))

### Add policy, round, variable type indicators 

# Generate product number variable
dfconj_long <- dfconj_long %>% 
  mutate(product = as.numeric(str_sub(str_extract(variable, "d\\d"), 2)))

# Generate experimental round variable
dfconj_long <- dfconj_long %>% 
  mutate(round = as.numeric(str_sub(str_extract(variable, "r\\d"), 2)))


# Generate product attribute variables
dfconj_long <- dfconj_long %>% 
  mutate(vars = paste0(str_sub(str_extract(variable, "(?<=\\d)[a-z]+$"), 1)))

# Generate choice variable
dfconj_long <- dfconj_long %>% 
  mutate(vars = ifelse(str_detect(variable, "q32x1"), "choice1", 
                       ifelse(str_detect(variable, "q32x2"), "choice2", 
                              ifelse(str_detect(variable, "q32x3"), "choice3",
                                     ifelse(str_detect(variable, "q32x4"), "choice4",
                                            vars)))))

# Fill in missing round variable
dfconj_long <- dfconj_long %>% 
  mutate(round = ifelse(str_detect(vars, "choice1"), 1, 
                        ifelse(str_detect(vars, "choice2"), 2, 
                               ifelse(str_detect(vars, "choice3"), 3,
                                      ifelse(str_detect(vars, "choice4"), 4,
                                             round)))))

# Rename choice variable

dfconj_long <- dfconj_long %>%
  mutate(vars = ifelse(str_detect(vars, "choice1|choice2|choice3|choice4"), "choice", vars))

# Remove original variable names 
dfconj_long <- dfconj_long %>% select(-variable)

# Widen data to observations per Id per round
dfconj <- dfconj_long %>%
  pivot_wider(names_from = vars, values_from = value)

### Bring attribute, choice and rate in one row per respondent-round-package

# Add Id-Round Indicator 
dfconj <- dfconj %>% mutate(idround = paste0(PubId, "-", round))

# Replace NAs for choice in rows with attribute info
dfconj <- dfconj %>% 
  group_by(idround) %>% 
  mutate(choice = replace_na(choice[!is.na(choice)])) %>% 
  ungroup()

# Transform choice to binary indicator (package chosen or not)
dfconj = dfconj %>% 
  mutate(choice = ifelse(product == choice, 1, 0))

# Filter rows and select variables
dfconj <- dfconj %>% 
  # Keep one row per respondent-round-package
  filter(!is.na(product)) %>% 
  # Drop duplicated variables 
  select(-idround)


# quick check -------------------------------------------------------------


# Manually type number of conjoint rounds and products per round
nrounds = 4
nproduct = 2

# Correct number of rows?
nrow(df_final)*nrounds*nproduct == nrow(dfconj)

# For every policy a respondent chose, the respondent did not choose another
sum(dfconj$choice == 0) == sum(dfconj$choice == 1) 


# Drop unused levels
dfconj = droplevels(dfconj)


# Generate variable indicating which randomized product was displayed

df2 <- df %>% 
  select(PubId, w8_treat_conj_buy)

dfconj <- merge(dfconj, df2, by = "PubId", all = TRUE)

dfconj <- dfconj %>% 
  mutate(w8_treat_conj_buy = recode(as.factor(w8_treat_conj_buy), '1' = 'Smartphone',
                                    '2' = 'TV',
                                    '3' = 'Washing machine')) %>% 
  rename(product_type = w8_treat_conj_buy)

# save conjoint dataframe 
write.csv(dfconj, file = "/Users/cbrugge/Desktop/CE Paper/wtp_data_ce.csv")
save(dfconj, file = "/Users/cbrugge/Desktop/CE Paper/wtp_data_ce.RData")



